

import braceexpand

import webdataset as wds
# shards = "pipe:aws s3 cp s3://s-datasets/laion400m/laion400m-dat-release/{00000..41455}.tar -"
shards = "pipe:aws s3 cp s3://s-datasets/laion400m/laion400m-dat-release/{00000..41455}.tar -::pipe:aws s3 cp s3://s-datasets/laion5b/laion2B-multi-data/{000000..162417}.tar -"

shards_list = wds.shardlists.expand_urls(shards)

# shards_list = list(braceexpand.braceexpand(shards))

print(len(shards_list))
print(shards_list[:3])
print(shards_list[-10:])
